C/C++ Users Group Library 1996 July

home *** CD-ROM | disk | FTP | other *** search

/ C/C++ Users Group Library 1996 July / C-C++ Users Group Library July 1996.iso / vol_300 / 333_02 / awktab.y < prev next >

Wrap

Text File | 1989-02-18 | 28.8 KB | 1,293 lines

/***************************************************************************/ /* gawk -- GNU version of awk */ /* YACC input file to create the gAWK semantic parser */ /* */ /* Copyright (C) 1986 Free Software Foundation */ /* Written by Paul Rubin, August 1986 */ /* */ /***************************************************************************/ /* */ /* GAWK is distributed in the hope that it will be useful, but WITHOUT ANY */ /* WARRANTY. No author or distributor accepts responsibility to anyone */ /* for the consequences of using it or for whether it serves any */ /* particular purpose or works at all, unless he says so in writing. */ /* Refer to the GAWK General Public License for full details. */ /* */ /* Everyone is granted permission to copy, modify and redistribute GAWK, */ /* but only under the conditions described in the GAWK General Public */ /* License. A copy of this license is supposed to have been given to you */ /* along with GAWK so you can know your rights and responsibilities. It */ /* should be in a file named COPYING. Among other things, the copyright */ /* notice and this notice must be preserved on all copies. */ /* */ /* In other words, go ahead and share GAWK, but don't try to stop */ /* anyone else from sharing it farther. Help stamp out software hoarding! */ /* */ /***************************************************************************/ %{ #define YYDEBUG 12 #include <stdio.h> #include <stdlib.h> #include <stdarg.h> #include <string.h> #include "awk.h" STATIC int NEAR PASCAL yylex(void); STATIC int NEAR PASCAL parse_escape(char **string_ptr); /* The following variable is used for a very sickening thing. The awk */ /* language uses white space as the string concatenation operator, but */ /* having a white space token that would have to appear everywhere in all */ /* the grammar rules would be unbearable. It turns out we can return */ /* CONCAT_OP exactly when there really is one, just from knowing what */ /* kinds of other tokens it can appear between (namely, constants, */ /* variables, or close parentheses). This is because concatenation has */ /* the lowest priority of all operators. want_concat_token is used to */ /* remember that something that could be the left side of a concat has */ /* just been returned. If anyone knows a cleaner way to do this (don't */ /* look at the Un*x code to find one, though), please suggest it. */ static int want_concat_token; /* Two more horrible kludges. The same comment applies to these two too */ static int want_regexp = 0; /* lexical scanning kludge */ int lineno = 1; /* JF for error msgs */ /* During parsing of a gAWK program, the pointer to the next character */ /* is in this variable. */ char *lexptr; char *lexptr_begin; %} %union { long lval; AWKNUM fval; NODE *nodeval; int nodetypeval; char *sval; NODE *(PASCAL *ptrval)(NODE *); } %type <nodeval> exp start program rule pattern conditional regexp %type <nodeval> action variable redirect_in redirect_out exp_list builtin %type <nodeval> statements statement if_statement opt_exp %type <nodetypeval> whitespace relop %token <sval> NAME REGEXP YSTRING %token <lval> ERROR INCDEC %token <fval> NUMBER %token <nodetypeval> ASSIGNOP MATCHOP NEWLINE CONCAT_OP %token <nodetypeval> LEX_BEGIN LEX_END LEX_IF LEX_ELSE %token <nodetypeval> LEX_WHILE LEX_FOR LEX_BREAK LEX_CONTINUE LEX_DELETE %token <nodetypeval> LEX_PRINT LEX_PRINTF LEX_NEXT LEX_EXIT %token <nodetypeval> RELOP_EQ RELOP_GEQ RELOP_LEQ RELOP_NEQ REDIR_APPEND %token LEX_IN %token <lval> LEX_AND LEX_OR INCREMENT DECREMENT %token <ptrval> LEX_BUILTIN LEX_MATCH_FUNC LEX_SUB_FUNC LEX_SPLIT_FUNC %token <ptrval> LEX_GETLINE /* Lowest to highest */ %right ASSIGNOP %left ',' %right '?' ':' %left LEX_OR %left LEX_AND %left CONCAT_OP %nonassoc MATCHOP '>' '<' RELOP_EQ RELOP_GEQ RELOP_LEQ RELOP_NEQ %left '+' '-' %left '*' '/' '%' %right UNARY %right '^' %% start : optional_newlines program { expression_value = $2; } ; program : rule { $$ = node($1, NODE_RULE_LIST, NULL); } | program rule { /* cons the rule onto the tail of list */ $$ = append_right($1, node($2, NODE_RULE_LIST, NULL)); } ; rule : pattern action NEWLINE optional_newlines { $$ = node($1, NODE_RULE_NODE, $2); } ; pattern : /* empty */ { $$ = NULL; } | LEX_BEGIN { $$ = node(NULL, NODE_K_BEGIN, NULL); } | LEX_END { $$ = node(NULL, NODE_K_END, NULL); } | conditional { $$ = $1; } | conditional ',' conditional { $$ = mkrangenode(node($1, NODE_COND_PAIR, $3)); } ; conditional : '!' conditional %prec UNARY { $$ = node($2, NODE_NOT, NULL); } | '(' exp_list ')' CONCAT_OP LEX_IN NAME { $$ = node(variable($6), NODE_MEMBER_COND, $2); } | exp CONCAT_OP LEX_IN NAME { $$ = node(variable($4), NODE_MEMBER_COND, $1); } | conditional LEX_AND conditional { $$ = node($1, NODE_AND, $3); } | conditional LEX_OR conditional { $$ = node ($1, NODE_OR, $3); } | '(' conditional ')' { $$ = $2; want_concat_token = 0; } | regexp { $$ = $1; } | exp MATCHOP regexp { $$ = node($1, $2, $3); } | exp MATCHOP variable { $$ = node($1, $2, $3); } | exp relop exp { $$ = node($1, $2, $3); } ; action : /* empty */ { $$ = NULL; } | '{' whitespace statements '}' { $$ = $3; } ; statements : statement { $$ = node($1, NODE_STATEMENT_LIST, NULL); } | statements statement { $$ = append_right($1, node($2, NODE_STATEMENT_LIST, NULL)); } ; statement_term : NEWLINE optional_newlines { $<nodetypeval>$ = NODE_ILLEGAL; } | ';' optional_newlines { $<nodetypeval>$ = NODE_ILLEGAL; } ; regexp : '/' { ++want_regexp; } REGEXP '/' { want_regexp = 0; $$ = node(NULL, NODE_REGEXP, (NODE *) make_regexp($3)); } relop : '>' { $$ = NODE_GREATER; } | '<' { $$ = NODE_LESS; } | RELOP_EQ { $$ = NODE_EQUAL; } | RELOP_GEQ { $$ = NODE_GEQ; } | RELOP_LEQ { $$ = NODE_LEQ; } | RELOP_NEQ { $$ = NODE_NOTEQUAL; } ; whitespace : /* blank */ { $$ = NODE_ILLEGAL; } | CONCAT_OP | NEWLINE | whitespace CONCAT_OP | whitespace NEWLINE ; statement : '{' whitespace statements '}' whitespace { $$ = $3; } | if_statement { $$ = $1; } | LEX_WHILE '(' conditional ')' whitespace statement { $$ = node($3, NODE_K_WHILE, $6); } | LEX_FOR '(' opt_exp ';' conditional ';' opt_exp ')' whitespace statement { $$ = node($10, NODE_K_FOR, (NODE *) make_for_loop($3, $5, $7)); } | LEX_FOR '(' opt_exp ';' ';' opt_exp ')' whitespace statement { $$ = node($9, NODE_K_FOR, (NODE *) make_for_loop($3, NULL, $6)); } | LEX_FOR '(' NAME CONCAT_OP LEX_IN NAME ')' whitespace statement { $$ = node($9, NODE_K_ARRAYFOR, (NODE *) make_for_loop(variable($3), NULL, variable($6))); } | LEX_BREAK statement_term /* for break, maybe we'll have to remember where to break to */ { $$ = node(NULL, NODE_K_BREAK, NULL); } | LEX_CONTINUE statement_term /* similarly */ { $$ = node(NULL, NODE_K_CONTINUE, NULL); } | LEX_PRINT exp_list redirect_out statement_term { $$ = node($2, NODE_K_PRINT, $3); } | LEX_PRINT '(' exp_list ')' /* BW: print(...) */ { want_concat_token = FALSE; } redirect_out statement_term { $$ = node($3, NODE_K_PRINT, $6); } | LEX_PRINTF exp_list redirect_out statement_term { $$ = node($2, NODE_K_PRINTF, $3); } | LEX_PRINTF '(' exp_list ')' { want_concat_token = FALSE; } redirect_out statement_term { $$ = node($3, NODE_K_PRINTF, $6); } | LEX_NEXT statement_term { $$ = node(NULL, NODE_K_NEXT, NULL); } | LEX_EXIT statement_term { $$ = node(NULL, NODE_K_EXIT, NULL); } | LEX_EXIT '(' exp ')' statement_term { $$ = node($3, NODE_K_EXIT, NULL); } | LEX_DELETE NAME '[' exp_list ']' statement_term { $$ = node(variable($2), NODE_K_DELETE, $4); } | exp statement_term { $$ = $1; } ; if_statement: LEX_IF '(' conditional ')' whitespace statement { $$ = node($3, NODE_K_IF, node($6, NODE_IF_BRANCHES, NULL)); } | LEX_IF '(' conditional ')' whitespace statement LEX_ELSE whitespace statement { $$ = node($3, NODE_K_IF, node($6, NODE_IF_BRANCHES, $9)); } ; optional_newlines : /* empty */ | optional_newlines NEWLINE { $<nodetypeval>$ = NODE_ILLEGAL; } ; redirect_out : /* empty */ { $$ = NULL; } | '>' YSTRING %prec UNARY { $$ = node(make_string($2, -1), NODE_REDIRECT_OUTPUT, NULL); } | '>' variable %prec UNARY { $$ = node($2, NODE_REDIRECT_OUTPUT, NULL); } | REDIR_APPEND YSTRING %prec UNARY { $$ = node(make_string($2, -1), NODE_REDIRECT_APPEND, NULL); } | REDIR_APPEND variable %prec UNARY { $$ = node($2, NODE_REDIRECT_APPEND, NULL); } ; redirect_in : /* empty */ { $$ = NULL; } | '<' YSTRING %prec UNARY { $$ = node(make_string($2, -1), NODE_REDIRECT_INPUT, NULL); } | '<' variable %prec UNARY { $$ = node($2, NODE_REDIRECT_INPUT, NULL); } ; variable : NAME { $$ = variable($1); } | NAME '[' exp_list ']' { $$ = node(variable($1), NODE_SUBSCRIPT, $3); } | '$' exp %prec UNARY { $$ = node($2, NODE_FIELD_SPEC, NULL); } ; /* optional expression, as in for loop */ opt_exp : /* empty */ { $$ = NULL; } | exp { $$ = $1; } ; exp_list : /* empty */ { $$ = NULL; } | exp { $$ = node($1, NODE_EXPRESSION_LIST, NULL); } | exp_list ',' exp { $$ = append_right($1, node($3, NODE_EXPRESSION_LIST, NULL)); } ; exp : '-' exp %prec UNARY { $$ = node($2, NODE_UNARY_MINUS, NULL); } | variable ASSIGNOP exp { $$ = node($1, $2, $3); } | builtin | '(' exp ')' { $$ = $2; } | INCREMENT variable %prec UNARY { $$ = node($2, NODE_PREINCREMENT, NULL); } | DECREMENT variable %prec UNARY { $$ = node($2, NODE_PREDECREMENT, NULL); } | variable INCREMENT %prec UNARY { $$ = node($1, NODE_POSTINCREMENT, NULL); } | variable DECREMENT %prec UNARY { $$ = node($1, NODE_POSTDECREMENT, NULL); } | variable { $$ = $1; } | NUMBER { $$ = make_number($1); } | YSTRING { $$ = make_string($1, -1); } /* Binary operators in order of decreasing precedence. */ | exp '^' exp { $$ = node($1, NODE_EXPONENTIAL, $3); } | exp '*' exp { $$ = node($1, NODE_TIMES, $3); } | exp '/' exp { $$ = node($1, NODE_QUOTIENT, $3); } | exp '%' exp { $$ = node($1, NODE_MOD, $3); } | exp '+' exp { $$ = node($1, NODE_PLUS, $3); } | exp '-' exp { $$ = node($1, NODE_MINUS, $3); } /* Empty operator. See yylex for disgusting details. */ | exp CONCAT_OP exp { $$ = node($1, NODE_CONCAT, $3); } | conditional '?' exp ':' exp { $$ = node($1, NODE_CONDEXP, node($3, NODE_CONDEXP_BRANCHES, $5)); } ; builtin : LEX_BUILTIN '(' exp_list ')' { $$ = snode($3, NODE_BUILTIN, $1); } | LEX_BUILTIN { $$ = snode(NULL, NODE_BUILTIN, $1); } | LEX_GETLINE redirect_in { auto NODE *tmp; tmp = node(NULL, NODE_GETLINE, $2); $$ = snode(tmp, NODE_BUILTIN, $1); } | LEX_GETLINE variable redirect_in { auto NODE *tmp; tmp = node($2, NODE_GETLINE, $3); $$ = snode(tmp, NODE_BUILTIN, $1); } | LEX_SPLIT_FUNC '(' exp_list ')' { auto int num; num = count_arguments($3); if (num < 2 || num > 3) yyerror("Invalid number of arguments for function"); $$ = snode($3, NODE_BUILTIN, $1); } | LEX_SPLIT_FUNC '(' exp_list ',' regexp ')' { auto NODE *tmp; tmp = node($5, NODE_EXPRESSION_LIST, NULL); tmp = append_right($3, tmp); if (count_arguments($3) != 3) yyerror("Invalid number of arguments for function"); $$ = snode($3, NODE_BUILTIN, $1); } | LEX_MATCH_FUNC '(' exp ',' regexp ')' { auto NODE *tmp; tmp = node($5, NODE_EXPRESSION_LIST, NULL); tmp = node($3, NODE_EXPRESSION_LIST, tmp); $$ = snode(tmp, NODE_BUILTIN, $1); } | LEX_MATCH_FUNC '(' exp_list ')' { if (count_arguments($3) != 2) yyerror("Invalid number of arguments for function"); $$ = snode($3, NODE_BUILTIN, $1); } | LEX_SUB_FUNC '(' regexp ',' exp ',' exp ')' { auto NODE *tmp; tmp = node($7, NODE_EXPRESSION_LIST, NULL); tmp = node($5, NODE_EXPRESSION_LIST, tmp); tmp = node($3, NODE_EXPRESSION_LIST, tmp); $$ = snode(tmp, NODE_BUILTIN, $1); } | LEX_SUB_FUNC '(' regexp ',' exp ')' { auto NODE *tmp; tmp = make_number((AWKNUM) 0.0); tmp = node(tmp, NODE_FIELD_SPEC, NULL); tmp = node(tmp, NODE_EXPRESSION_LIST, NULL); tmp = node($5, NODE_EXPRESSION_LIST, tmp); tmp = node($3, NODE_EXPRESSION_LIST, tmp); $$ = snode(tmp, NODE_BUILTIN, $1); } | LEX_SUB_FUNC '(' exp_list ')' { auto int num; auto NODE *tmp; num = count_arguments($3); if (num < 2 || num > 3) yyerror("Invalid number of arguments for function"); if (num == 2) { tmp = make_number((AWKNUM) 0.0); tmp = node(tmp, NODE_FIELD_SPEC, NULL); append_right($3, node(tmp, NODE_EXPRESSION_LIST, NULL)); } $$ = snode($3, NODE_BUILTIN, $1); } ; %% struct token { char *operator; int value; int class; NODE *(PASCAL *ptr)(NODE *); }; /* Tokentab is sorted ascii ascending order, so it can be binary searched. */ /* DO NOT enter table entries out of order lest search can't find them. */ static struct token tokentab[] = { { "BEGIN", NODE_ILLEGAL, LEX_BEGIN, NULL }, { "END", NODE_ILLEGAL, LEX_END, NULL }, { "atan2", NODE_BUILTIN, LEX_BUILTIN, do_atan2 }, #ifndef FAST { "bp", NODE_BUILTIN, LEX_BUILTIN, do_bp }, #endif { "break", NODE_K_BREAK, LEX_BREAK, NULL }, { "close", NODE_BUILTIN, LEX_BUILTIN, do_close }, { "continue", NODE_K_CONTINUE, LEX_CONTINUE, NULL }, { "cos", NODE_BUILTIN, LEX_BUILTIN, do_cos }, { "delete", NODE_K_DELETE, LEX_DELETE, NULL }, { "else", NODE_ILLEGAL, LEX_ELSE, NULL }, { "exit", NODE_K_EXIT, LEX_EXIT, NULL }, { "exp", NODE_BUILTIN, LEX_BUILTIN, do_exp }, { "for", NODE_K_FOR, LEX_FOR, NULL }, { "getline", NODE_BUILTIN, LEX_GETLINE, do_getline }, { "gsub", NODE_BUILTIN, LEX_SUB_FUNC, do_gsub }, { "if", NODE_K_IF, LEX_IF, NULL }, { "in", NODE_ILLEGAL, LEX_IN, NULL }, { "index", NODE_BUILTIN, LEX_BUILTIN, do_index }, { "int", NODE_BUILTIN, LEX_BUILTIN, do_int }, { "length", NODE_BUILTIN, LEX_BUILTIN, do_length }, { "log", NODE_BUILTIN, LEX_BUILTIN, do_log }, { "lower", NODE_BUILTIN, LEX_BUILTIN, do_lower }, { "match", NODE_BUILTIN, LEX_MATCH_FUNC, do_match }, { "next", NODE_K_NEXT, LEX_NEXT, NULL }, { "print", NODE_K_PRINT, LEX_PRINT, NULL }, { "printf", NODE_K_PRINTF, LEX_PRINTF, NULL }, #ifndef FAST { "prvars", NODE_BUILTIN, LEX_BUILTIN, do_prvars }, #endif { "rand", NODE_BUILTIN, LEX_BUILTIN, do_rand }, { "reverse", NODE_BUILTIN, LEX_BUILTIN, do_reverse }, { "sin", NODE_BUILTIN, LEX_BUILTIN, do_sin }, { "split", NODE_BUILTIN, LEX_SPLIT_FUNC, do_split }, { "sprintf", NODE_BUILTIN, LEX_BUILTIN, do_sprintf }, { "sqrt", NODE_BUILTIN, LEX_BUILTIN, do_sqrt }, { "srand", NODE_BUILTIN, LEX_BUILTIN, do_srand }, { "sub", NODE_BUILTIN, LEX_SUB_FUNC, do_sub }, { "substr", NODE_BUILTIN, LEX_BUILTIN, do_substr }, { "system", NODE_BUILTIN, LEX_BUILTIN, do_system }, { "upper", NODE_BUILTIN, LEX_BUILTIN, do_upper }, { "while", NODE_K_WHILE, LEX_WHILE, NULL } }; /* Read one token, getting characters through lexptr. */ STATIC int NEAR PASCAL yylex(void) { register int c; register int namelen; register char *tokstart; static int last_tok_1 = 0; static int last_tok_2 = 0; static int did_newline = 0; /* JF the grammar insists that */ /* actions end with newlines. */ auto int do_concat; auto int seen_e = 0; /* These are for numbers */ auto int seen_point = 0; auto int next_tab; retry: if(!lexptr) return(0); if (want_regexp) { /* there is a potential bug if a regexp is followed by an equal */ /* sign: "/foo/=bar" would result in assign_quotient being returned */ /* as the next token. Nothing is done about it since it is not */ /* valid awk, but maybe something should be done anyway. */ want_regexp = 0; tokstart = lexptr; while (c = *lexptr++) { switch (c) { case '\\': if (*lexptr++ == EOS) { yyerror ("unterminated regexp ends with \\"); return(ERROR); } break; case '/': /* end of the regexp */ lexptr--; yylval.sval = tokstart; return(REGEXP); case '\n': case EOS: yyerror("unterminated regexp"); return(ERROR); } } } do_concat = want_concat_token; want_concat_token = 0; if (*lexptr == EOS) { lexptr = NULL; return(NEWLINE); } /* if lexptr is at white space between two terminal tokens or parens, */ /* it is a concatenation operator. */ if (do_concat && (*lexptr == ' ' || *lexptr == '\t')) { while (*lexptr == ' ' || *lexptr == '\t') lexptr++; if (isalnum(*lexptr) || *lexptr == '\"' || *lexptr == '(' || *lexptr == '.' || *lexptr == '$') return(CONCAT_OP); } while (*lexptr == ' ' || *lexptr == '\t') lexptr++; tokstart = lexptr; /* JF */ last_tok_1 = last_tok_2; last_tok_2 = *lexptr; switch (c = *lexptr++) { case 0: return(0); case '\n': ++lineno; if (',' == last_tok_1) /* BW: allow lines to be continued */ goto retry; /* at a comma */ return(NEWLINE); case '#': /* it's a comment */ while (*lexptr != '\n' && *lexptr != EOS) lexptr++; goto retry; case '\\': if (*lexptr == '\n') { ++lexptr; ++lineno; goto retry; } break; case ')': case ']': ++want_concat_token; /* intentional fall thru */ case '(': /* JF these were above, but I don't see why */ case '[': /* they should turn on concat. . . & */ case '{': case ',': /* JF */ case '$': case ';': case '?': case ':': /* set node type to ILLEGAL because the action should set it to */ /* the right thing */ yylval.nodetypeval = NODE_ILLEGAL; return(c); case '^': /* added by BW 12-12-88 */ if (*lexptr == '=') { yylval.nodetypeval = NODE_ASSIGN_EXPONENTIAL; ++lexptr; return(ASSIGNOP); } yylval.nodetypeval = NODE_ILLEGAL; return(c); case '*': if (*lexptr == '=') { yylval.nodetypeval = NODE_ASSIGN_TIMES; ++lexptr; return(ASSIGNOP); } yylval.nodetypeval = NODE_ILLEGAL; return(c); case '/': if (*lexptr == '=') { yylval.nodetypeval = NODE_ASSIGN_QUOTIENT; ++lexptr; return(ASSIGNOP); } yylval.nodetypeval = NODE_ILLEGAL; return(c); case '%': if (*lexptr == '=') { yylval.nodetypeval = NODE_ASSIGN_MOD; ++lexptr; return(ASSIGNOP); } yylval.nodetypeval = NODE_ILLEGAL; return(c); case '+': if (*lexptr == '=') { yylval.nodetypeval = NODE_ASSIGN_PLUS; ++lexptr; return(ASSIGNOP); } if (*lexptr == '+') { yylval.nodetypeval = NODE_ILLEGAL; ++lexptr; return(INCREMENT); } yylval.nodetypeval = NODE_ILLEGAL; return(c); case '!': if (*lexptr == '=') { yylval.nodetypeval = NODE_ILLEGAL; ++lexptr; return(RELOP_NEQ); } if (*lexptr == '~') { yylval.nodetypeval = NODE_NOMATCH; ++lexptr; return(MATCHOP); } yylval.nodetypeval = NODE_ILLEGAL; return(c); case '<': yylval.nodetypeval = NODE_ILLEGAL; if (*lexptr == '=') { ++lexptr; return(RELOP_LEQ); } return(c); case '=': if (*lexptr == '=') { yylval.nodetypeval = NODE_ILLEGAL; ++lexptr; return(RELOP_EQ); } yylval.nodetypeval = NODE_ASSIGN; return(ASSIGNOP); case '>': yylval.nodetypeval = NODE_ILLEGAL; if ('>' == *lexptr) { ++lexptr; return(REDIR_APPEND); } if (*lexptr == '=') { ++lexptr; return(RELOP_GEQ); } return(c); case '~': yylval.nodetypeval = NODE_MATCH; return(MATCHOP); case '}': if (did_newline) { did_newline = 0; return(c); } ++did_newline; --lexptr; return(NEWLINE); case '"': while (*lexptr != EOS) { switch (*lexptr++) { case '\\': if (*lexptr++ != EOS) break; /* fall through */ case '\n': yyerror("unterminated string"); return(ERROR); case '\"': yylval.sval = tokstart + 1; ++want_concat_token; return(YSTRING); } } return(ERROR); /* JF this was one level up, wrong? */ case '-': if (*lexptr == '=') { yylval.nodetypeval = NODE_ASSIGN_MINUS; ++lexptr; return(ASSIGNOP); } if (*lexptr == '-') { yylval.nodetypeval = NODE_ILLEGAL; ++lexptr; return(DECREMENT); } /* JF I think space tab comma and newline are the legal places */ /* for a UMINUS. Have I missed any? */ if ((!isdigit(*lexptr) && *lexptr!='.') || (lexptr > lexptr_begin + 1 && !index(" \t,\n",lexptr[-2]))) { /* set node type to ILLEGAL because the action should set */ /* it to the right thing */ yylval.nodetypeval = NODE_ILLEGAL; return(c); } /* FALL through into number code */ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '.': namelen = ('-' == c) ? 1 : 0; for (; (c = tokstart[namelen]) != EOS; namelen++) { switch (c) { case '.': if (seen_point) goto got_number; ++seen_point; break; case 'e': case 'E': if (seen_e) goto got_number; ++seen_e; if (tokstart[namelen+1] == '-' || tokstart[namelen+1] == '+') namelen++; break; default: if (c < '0' || c > '9') goto got_number; break; } } got_number: lexptr = tokstart + namelen; c = *lexptr; /* BW: some versions of */ *lexptr = EOS; /* atof() are very */ yylval.fval = (AWKNUM) atof(tokstart); /* picky about what */ *lexptr = c; /* follows a number. */ ++want_concat_token; return(NUMBER); case '&': if (*lexptr == '&') { yylval.nodetypeval = NODE_AND; ++lexptr; return(LEX_AND); } return(ERROR); case '|': if (*lexptr == '|') { yylval.nodetypeval = NODE_OR; ++lexptr; return(LEX_OR); } return(ERROR); } if (!isalpha(c)) { yyerror("Invalid char '%c' in expression\n", c); return(ERROR); } /* its some type of name-type-thing. Find its length */ for (namelen = 0; is_identchar(tokstart[namelen]); namelen++) ; /* See if it is a special token. binary search added by BW */ { register int x; auto char tok1; auto int l = 0, r = MAXDIM(tokentab) - 1; do { x = (l + r) / 2; tok1 = tokentab[x].operator[0]; if (tok1 == *tokstart) { tok1 = memcmp(tokstart, tokentab[x].operator, namelen); if (0 == tok1 && EOS == tokentab[x].operator[namelen]) break; if (tok1 > 0) l = x + 1; else r = x - 1; } else { if (*tokstart < tok1) r = x - 1; else l = x + 1; } } while (l <= r); if (l <= r) { lexptr = tokstart + namelen; if (NODE_BUILTIN == tokentab[x].value) yylval.ptrval = tokentab[x].ptr; else yylval.nodetypeval = tokentab[x].value; return(tokentab[x].class); } } /* It's a name. See how long it is. */ yylval.sval = tokstart; lexptr = tokstart+namelen; ++want_concat_token; return(NAME); } void yyerror(char *mesg, ...) { register char *ptr, *beg; auto va_list ap; auto int colno = 0; auto int i, next_tab; /* Find the current line in the input file */ if (!lexptr) { beg = "(END OF FILE)"; ptr = beg + strlen(beg); } else { if (*lexptr == '\n' && lexptr != lexptr_begin) --lexptr; for (beg = lexptr; beg != lexptr_begin && *beg != '\n'; --beg) ; for (ptr = lexptr; *ptr && *ptr != '\n'; ptr++) ; if (beg != lexptr_begin) ++beg; i = 0; while (beg + i <= lexptr) { if ('\t' == beg[i]) { next_tab = colno % TABSTOPS; colno += (next_tab ? next_tab : TABSTOPS); } else ++colno; ++i; } } fprintf(stderr, "\n'%.*s'\n", ptr - beg, beg); fprintf(stderr, "%*s\n", colno + 1, "^"); /* figure out line number, etc. later */ va_start(ap, mesg); vfprintf(stderr, mesg, ap); va_end(ap); fprintf(stderr, " near line %d column %d\n", lineno, colno); exit(1); } /* Parse a C escape sequence. STRING_PTR points to a variable containing a pointer to the string to parse. That pointer is updated past the characters we use. The value of the escape sequence is returned. A negative value means the sequence \ newline was seen, which is supposed to be equivalent to nothing at all. If \ is followed by a null character, we return a negative value and leave the string pointer pointing at the null character. If \ is followed by 000, we return 0 and leave the string pointer after the zeros. A value of 0 does not mean end of string. */ STATIC int NEAR PASCAL parse_escape(char **string_ptr) { register int c = *(*string_ptr)++; switch (c) { case 'a': return('\a'); case 'b': return('\b'); case 'e': return(033); case 'f': return('\f'); case 'n': return('\n'); case 'r': return('\r'); case 't': return('\t'); case 'v': return('\v'); case '\n': return(-2); case 0: (*string_ptr)--; return(0); case '^': c = *(*string_ptr)++; if (c == '\\') c = parse_escape(string_ptr); if (c == '?') return(0177); return((c & 0200) | (c & 037)); case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': { register int i = c - '0'; register int count = 0; while (++count < 3) { if ((c = *(*string_ptr)++) >= '0' && c <= '7') { i *= 8; i += c - '0'; } else { (*string_ptr)--; break; } } return(i); } default: return(c); } }